"""Script to update kpet-db with the tests Polarion ID."""

import argparse
from io import BytesIO
import json
import logging
import os
import pathlib
import re
import subprocess
import sys
import tarfile
import tempfile
import typing
from typing import cast
import uuid
import zipfile

from cki_lib import logger
from cki_lib import misc
from cki_lib import session
from cki_lib import yaml
import sentry_sdk

LOGGER: logging.Logger = logger.get_logger('cki_tools.sync_polarion_id')
SESSION = session.get_session('cki_tools.sync_polarion_id', raise_for_status=True)


def get_origins(kpet_db_path: str) -> dict[str, str]:
    """Get the 'default' value from kpet-db variables with name '${origin}_url'."""
    kpet_db = yaml.load(file_path=f'{kpet_db_path}/index.yaml')
    origins = {}
    for origin in kpet_db['origins']:
        for variable in kpet_db['variables']:
            if re.search(f'{origin}_url', variable):
                origins[origin] = kpet_db['variables'][variable]['default']
    return origins


def has_fmf(test_dir: str) -> bool:
    """Check if test directory has fmf definition."""
    return os.path.isdir(f'{test_dir}/.fmf')


def get_tmt_tests(test_dir: str) -> list[typing.Any]:
    """Return a list of all tmt tests on the directory."""
    tmt_cmd = ['tmt', 'test', 'export', '--how', 'json']
    LOGGER.debug('Going to execute: %s', " ".join(tmt_cmd))
    try:
        tmt_tests = json.loads(subprocess.run(tmt_cmd, capture_output=True, check=True,
                               cwd=test_dir, encoding='utf8').stdout)
    except subprocess.CalledProcessError as e:
        LOGGER.error('Failed to run tmt command (%s). Skipping...', e.stderr)
        return []
    return cast(list[typing.Any], tmt_tests)


def get_suitable_test(test_name: str, tmt_tests: list, kpet_environment: str) -> dict:
    """From the tmt tests, return the most suitable match."""
    test_entries = []
    for entry in tmt_tests:
        if re.match(r'/' + f'{test_name}', entry['name']):
            test_entries.append(entry)

    selected_tests = []
    for entry in test_entries:
        test_env = entry.get('environment', {})
        skip_entry = False
        if test_env:
            # test expects to run with specific environment variables
            # check if kpet configuration sets them
            for env in test_env:
                if env not in kpet_environment:
                    LOGGER.debug('skip: test %s: env %s is not set in kpet-db (%s).',
                                 entry['name'], env, kpet_environment)
                    skip_entry = True
                    continue
                if test_env[env] != kpet_environment[env]:
                    LOGGER.debug('skip test %s: env %s is not the same in kpet-db (%s).',
                                 entry['name'], env, kpet_environment)
                    skip_entry = True
                    continue
        if not skip_entry:
            selected_tests.append(entry)

    if len(selected_tests) > 1:
        # There is more than one selected tests
        # Try to find best candidate
        # If there is one that match all environment variables and another without
        # environment variable it should select the one with environment variable
        LOGGER.debug('multiple tmt test defined, removing the one without environment')
        selected_tests[:] = [x for x in selected_tests if x['environment']]

    if len(selected_tests) == 0:
        return None

    if len(selected_tests) == 1:
        LOGGER.debug('selected tmt test %s', selected_tests[0])
        return selected_tests[0]

    LOGGER.error('there is more than one tmt test defined for %s', test_name)
    return None


def get_test_id(test_name: str, tmt_tests: list, kpet_environment: str) -> str:
    # pylint: disable=R0911
    """Get the 'id' from the fmf test, tmt uses the fmf metadata."""
    test = get_suitable_test(test_name, tmt_tests, kpet_environment)
    if not test:
        LOGGER.debug('there is no tmt test defined for %s', test_name)
        return None

    if 'id' not in test or not test['id']:
        LOGGER.debug('Found test %s does not have id.', test['name'])
        return None

    test_id = test['id']
    try:
        if uuid.UUID(test_id).version != 4:
            LOGGER.warning('UUID (%s) for test %s is not version 4.', test_id, test_name)
            return None
    except ValueError:
        LOGGER.warning('ID (%s) for test %s is not a valid UUID.', test_id, test_name)
        return None
    return test_id


def extract_origin(origin_url: str, ext_dir: str) -> None:
    """Download origin url and extract to a directory."""
    LOGGER.info('Downloading repo: %s', origin_url)
    req = SESSION.get(url=origin_url)
    with zipfile.ZipFile(BytesIO(req.content)) as zipf:
        LOGGER.info('extracting repo to %s', ext_dir)
        zipf.extractall(path=ext_dir)


def extract_origin_targz(origin_url: str, ext_dir: str) -> None:
    """Download origin_url and extract all fmf meta data files."""
    LOGGER.info('Extracting repo %s to %s', origin_url, ext_dir)
    with (SESSION.get(url=origin_url, stream=True) as req,
          tarfile.open(fileobj=req.raw, mode="r|gz") as tarf):
        for tarinfo in tarf:
            if tarinfo.name.endswith('.fmf') or '.fmf/' in tarinfo.name:
                tarf.extract(tarinfo, ext_dir)


def list_tests(kpet_db_path: str) -> list[dict[str, typing.Any]]:
    """List all tests in kpet-db."""
    kpet_cmd = [
        'python3', '-m', 'kpet',
        '--db', kpet_db_path,
        'test', 'list',
        '-o', 'json',
    ]
    return typing.cast(list[dict[str, typing.Any]],
                       json.loads(subprocess.run(kpet_cmd, capture_output=True, check=True).stdout))


def populate_tests_id(kpet_db_path: str) -> dict[list]:
    """For each kpet-db test, add the Polarion ID per test location."""
    tests: dict[str, list[dict[str, str]]] = {}
    for test in list_tests(kpet_db_path):
        new_test = {
            'location': test['location'],
            'environment': test['environment'],
        }
        if new_test not in (test_list := tests.setdefault(test['origin'], [])):
            test_list.append(new_test)

    origins = get_origins(kpet_db_path)
    for origin in tests:  # pylint: disable=C0206
        with tempfile.TemporaryDirectory() as ext_dir:
            if origins[origin].endswith('.tar.gz'):
                extract_origin_targz(origins[origin], ext_dir)
            else:
                extract_origin(origins[origin], ext_dir)
            if not (origin_fmf_files := os.listdir(ext_dir)):
                LOGGER.info('%s does not have any fmf files', origin)
                continue
            test_dir = f'{ext_dir}/{origin_fmf_files[0]}'
            if not has_fmf(test_dir):
                LOGGER.info('%s does not have fmf tests', origin)
                continue

            tmt_tests = get_tmt_tests(test_dir)
            for test in tests[origin]:
                test_id = get_test_id(test['location'], tmt_tests, test['environment'])
                if not test_id:
                    continue
                test['id'] = test_id
                LOGGER.info('test located at %s got id: %s', test['location'], test['id'])
                if test['environment']:
                    LOGGER.info('test environment with environment: %s', test['environment'])

    return tests


def generate_external_j2(tests: dict, kpet_db_path: str) -> None:
    """Generate the kpet-db external.j2 file based on tests content."""
    content = """{################### DO NOT EDIT! ###############
 # Various external data, imported automatically.
 ################################################}

{# External test data, indexed by the origin and the location.
 # Recognized fields:
 #  id:   The test's Polarion ID
 #}
{% set tests =
"""

    outputfile = f'{kpet_db_path}/external.j2'

    LOGGER.info('Creating %s', outputfile)
    tests_out = {}
    for origin in sorted(tests):
        LOGGER.debug('processing tests from origin %s', origin)
        for test in tests[origin]:
            if 'id' not in test:
                continue
            if origin not in tests_out:
                tests_out[origin] = {}
            LOGGER.debug('Adding test %s', test['location'])
            tests_out[origin][test['location']] = {}
            tests_out[origin][test['location']]['id'] = test['id']

    content += json.dumps(tests_out, indent=4)
    content += "%}"
    pathlib.Path(outputfile).write_text(content, encoding='utf8')
    LOGGER.info('Created %s', outputfile)


def main(argv: typing.Optional[typing.List[str]] = None) -> int:
    """Script to update kpet-db ystream composes."""
    parser = argparse.ArgumentParser(
        description='Output the patch that would be needed to update y-stream trees in kpet-db')
    parser.add_argument('--kpet-db-path', required=True,
                        help='Path to kpet-db database')
    args = parser.parse_args(argv)

    test_locations = populate_tests_id(args.kpet_db_path)
    generate_external_j2(test_locations, args.kpet_db_path)

    return 0


if __name__ == '__main__':
    misc.sentry_init(sentry_sdk)
    sys.exit(main())
